In [2]:
options(warn = -1)
options(repr.plot.width=15, repr.plot.height=9) 
Sys.setlocale(category = "LC_ALL", locale = "ko_KR.UTF-8")
'ko_KR.UTF-8/ko_KR.UTF-8/ko_KR.UTF-8/C/ko_KR.UTF-8/C'
In [6]:
#install.packages("extrafont")
Error in pdf("output.pdf", width = 8, height = 6, family = "AppleGothic"): 알 수 없는 페밀리 'AppleGothic'입니다
Traceback:

1. pdf("output.pdf", width = 8, height = 6, family = "AppleGothic")
2. stop(gettextf("unknown family '%s'", family), domain = NA)

서울시 열린 데이터 광장

출처 : http://data.seoul.go.kr/dataList/OA-12914/S/1/datasetView.do

In [3]:
PROJECT01 <- read.csv("./Data/CARD_SUBWAY_MONTH_2020.CSV", row.names = NULL,fileEncoding = "EUC-KR")
colnames(PROJECT01) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자")
PROJECT02 <- read.csv("./Data/CARD_SUBWAY_MONTH_2021.CSV", row.names = NULL,fileEncoding = "EUC-KR")
colnames(PROJECT02) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT02 <- PROJECT02[-length(PROJECT02)]
PROJECT03 <- read.csv("./Data/CARD_SUBWAY_MONTH_2022.CSV", row.names = NULL,fileEncoding = "EUC-KR")
colnames(PROJECT03) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT03 <- PROJECT03[-length(PROJECT03)]
PROJECT04 <- read.csv("./Data/CARD_SUBWAY_MONTH_202301.CSV", row.names = NULL)
colnames(PROJECT04) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT04 <- PROJECT04[-length(PROJECT04)]
PROJECT05 <- read.csv("./Data/CARD_SUBWAY_MONTH_202302.CSV", row.names = NULL)
colnames(PROJECT05) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT05 <- PROJECT05[-length(PROJECT05)]
PROJECT06 <- read.csv("./Data/CARD_SUBWAY_MONTH_202303.CSV", row.names = NULL)
colnames(PROJECT06) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT06 <- PROJECT06[-length(PROJECT06)]
PROJECT07 <- read.csv("./Data/CARD_SUBWAY_MONTH_202304.CSV", row.names = NULL)
colnames(PROJECT07) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT07 <- PROJECT07[-length(PROJECT07)]
PROJECT08 <- read.csv("./Data/CARD_SUBWAY_MONTH_202305.CSV", row.names = NULL)
colnames(PROJECT08) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT08 <- PROJECT08[-length(PROJECT08)]
PROJECT09 <- read.csv("./Data/CARD_SUBWAY_MONTH_202306.CSV", row.names = NULL)
colnames(PROJECT09) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT09 <- PROJECT09[-length(PROJECT09)]
PROJECT10 <- read.csv("./Data/CARD_SUBWAY_MONTH_202307.CSV", row.names = NULL)
colnames(PROJECT10) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT10 <- PROJECT10[-length(PROJECT10)]
PROJECT11 <- read.csv("./Data/CARD_SUBWAY_MONTH_202308.CSV", row.names = NULL)
colnames(PROJECT11) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT11 <- PROJECT11[-length(PROJECT11)]
PROJECT12 <- read.csv("./Data/CARD_SUBWAY_MONTH_202309.CSV", row.names = NULL)
colnames(PROJECT12) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT12 <- PROJECT12[-length(PROJECT12)]
PROJECT13 <- read.csv("./Data/CARD_SUBWAY_MONTH_202310.CSV", row.names = NULL)
colnames(PROJECT13) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT13 <- PROJECT13[-length(PROJECT13)]
PROJECT14 <- read.csv("./Data/CARD_SUBWAY_MONTH_202311.CSV", row.names = NULL)
colnames(PROJECT14) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT14 <- PROJECT14[-length(PROJECT14)]
PROJECT15 <- read.csv("./Data/CARD_SUBWAY_MONTH_202312.CSV", row.names = NULL)
colnames(PROJECT15) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT15 <- PROJECT15[-length(PROJECT15)]
PROJECT16 <- read.csv("./Data/CARD_SUBWAY_MONTH_202401.CSV", row.names = NULL)
colnames(PROJECT16) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT16 <- PROJECT16[-length(PROJECT16)]
PROJECT17 <- read.csv("./Data/CARD_SUBWAY_MONTH_202402.CSV", row.names = NULL)
colnames(PROJECT17) <- c("사용일자","노선명","역명","승차총승객수","하차총승객수","등록일자","ㅇ")
PROJECT17 <- PROJECT17[-length(PROJECT17)]

project <- rbind(PROJECT01,PROJECT02,PROJECT03,PROJECT04,PROJECT05,PROJECT06,PROJECT07,PROJECT08,PROJECT09,PROJECT10
,PROJECT11,PROJECT12,PROJECT13,PROJECT14,PROJECT15,PROJECT16,PROJECT17)
In [8]:
write.csv(project,"./Data/2020_202402_CARD_SUBWAY_MONTH",row.names=F)
str(project)
summary(project$`승차총승객수`)
summary(project$`하차총승객수`)

project[project$노선명 == "중앙선", ]
'data.frame':	913822 obs. of  6 variables:
 $ 사용일자    : chr  "20200101" "20200101" "20200101" "20200101" ...
 $ 노선명      : chr  "1호선" "1호선" "우이신설선" "우이신설선" ...
 $ 역명        : chr  "종각" "시청" "신설동" "보문" ...
 $ 승차총승객수: num  20427 12126 892 917 2010 ...
 $ 하차총승객수: num  16301 10516 828 855 2363 ...
 $ 등록일자    : chr  "20200104" "20200104" "20200104" "20200104" ...
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
       0     3289     6967     9922    13025 20211204 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      0    3186    6797    9863   12848  136864       1 
A data.frame: 31899 x 6
사용일자노선명역명승차총승객수하차총승객수등록일자
<chr><chr><chr><dbl><dbl><chr>
29820200101중앙선지평 32 2120200104
29920200101중앙선용문 1583 145020200104
30120200101중앙선원덕 241 20520200104
30220200101중앙선양평 1838 179020200104
30320200101중앙선오빈 189 17020200104
30420200101중앙선아신 381 36020200104
30520200101중앙선국수 394 38820200104
30620200101중앙선신원 109 8720200104
30720200101중앙선양수 1330 142420200104
30820200101중앙선운길산 612 60320200104
30920200101중앙선팔당 697 66020200104
31020200101중앙선도심 1134 102920200104
31120200101중앙선덕소 3140 322220200104
31220200101중앙선양정 318 25820200104
31320200101중앙선도농 4041 410420200104
31420200101중앙선구리 6597 690820200104
31520200101중앙선양원 661 70520200104
31620200101중앙선망우 4150 422020200104
31720200101중앙선상봉(시외버스터미널) 3355 324820200104
31820200101중앙선중랑 2849 286520200104
31920200101중앙선회기 130921363320200104
86520200102중앙선회기 271482668220200105
86620200102중앙선중랑 6028 565820200105
86720200102중앙선상봉(시외버스터미널) 6510 610920200105
86820200102중앙선망우 8275 798320200105
86920200102중앙선양원 1755 182320200105
87020200102중앙선구리 147511506220200105
87120200102중앙선도농 10703 947120200105
87220200102중앙선양정 830 71120200105
87320200102중앙선덕소 6401 632520200105
.....................
91319820240228중앙선아신 632 66220240302
91319920240228중앙선국수 708 71720240302
91320020240228중앙선신원 172 16720240302
91320120240228중앙선양수 1932 189820240302
91320220240228중앙선운길산 623 63520240302
91320320240228중앙선팔당 894 89620240302
91320420240228중앙선도심 2581 201020240302
91320520240228중앙선덕소 6045 588420240302
91320620240228중앙선도농 120771126620240302
91320820240229중앙선회기 298282836320240303
91320920240229중앙선양원 2971 285620240303
91321020240229중앙선망우 8205 794420240303
91321120240229중앙선상봉 6246 617620240303
91321220240229중앙선중랑 6231 575720240303
91321520240229중앙선지평 63 7920240303
91321620240229중앙선용문 1791 201920240303
91321720240229중앙선원덕 315 33620240303
91321820240229중앙선양평 3296 368620240303
91321920240229중앙선오빈 330 34920240303
91322020240229중앙선아신 597 60720240303
91322120240229중앙선국수 650 70520240303
91322220240229중앙선신원 175 15120240303
91322320240229중앙선양수 1845 184020240303
91322420240229중앙선운길산 568 55120240303
91322520240229중앙선팔당 771 86720240303
91322620240229중앙선도심 2523 202920240303
91322720240229중앙선덕소 6083 582220240303
91322820240229중앙선양정 555 48520240303
91322920240229중앙선도농 120931140920240303
91381420240229중앙선구리 144441468620240303
In [399]:
sum_on <- sum(project[,4])
sum_out <- sum(project[,5])
In [32]:
library(dplyr)
project_re <- project
project_re$노선명[project_re$노선명 == "9호선2~3단계"] <- "9호선"

project_on <- aggregate(승차총승객수~노선명,project_re,sum)
project_out <- aggregate(하차총승객수~노선명,project_re,sum)
projectsum_on <- sum(project_on[,2])
projectsum_out <- sum(project_out[,2])



project_on$승객비율 <- project_on$`승차총승객수`/sum(project_on$`승차총승객수`)*100
project_on <- arrange(project_on,desc(project_on$`승객비율`))
project_out$승객비율 <- project_out$`하차총승객수`/sum(project_out$`하차총승객수`)*100
project_out <- arrange(project_out,desc(project_out$`승객비율`))

new <- data.frame(노선명 = "기타",
                  승차총승객수 = sum(project_on$`승차총승객수`[15:nrow(project_on)]),
                  승객비율 = sum(project_on$`승차총승객수`[15:nrow(project_on)])/sum(project_on$`승차총승객수`)*100)

project_on <- project_on[-(15:nrow(project_on)), ]
project_on <- rbind(project_on,new)


new1 <- data.frame(노선명 = "기타",
                  하차총승객수 = sum(project_out$`하차총승객수`[15:nrow(project_out)]),
                  승객비율 = sum(project_out$`하차총승객수`[15:nrow(project_out)])/sum(project_out$`하차총승객수`)*100)

project_out <- project_out[-(15:nrow(project_out)), ]
project_out <- rbind(project_out,new1)


cols <-ifelse(project_on$`노선명` %in% c("1호선", "경부선"), "darkblue",
                      ifelse(project_on$`노선명` == "2호선", "green",
                      ifelse(project_on$`노선명` == "3호선", "orange",
                      ifelse(project_on$`노선명` == "4호선", "#63cef5",
                      ifelse(project_on$`노선명` == "5호선", "purple",
                      ifelse(project_on$`노선명` == "6호선", "brown",
                      ifelse(project_on$`노선명` == "7호선", "darkgreen",
                      ifelse(project_on$`노선명` == "8호선", "pink",
                      ifelse(project_on$`노선명` == "9호선" | project_on$`노선명` == "9호선2~3단계", "tan",
                      ifelse(project_on$`노선명` == "경강선", "skyblue", 
                      ifelse(project_on$`노선명`=="수인선"| project_on$`노선명`=="분당선","yellow",
                      ifelse(project_on$`노선명`=="공항철도 1호선","#37b0e1",
                      ifelse(project_on$`노선명`=="안산선","cyan","black")))))))))))))

cols1 <-ifelse(project_out$`노선명` %in% c("1호선", "경부선"), "darkblue",
                      ifelse(project_out$`노선명` == "2호선", "green",
                      ifelse(project_out$`노선명` == "3호선", "orange",
                      ifelse(project_out$`노선명` == "4호선", "#63cef5",
                      ifelse(project_out$`노선명` == "5호선", "purple",
                      ifelse(project_out$`노선명` == "6호선", "brown",
                      ifelse(project_out$`노선명` == "7호선", "darkgreen",
                      ifelse(project_out$`노선명` == "8호선", "pink",
                      ifelse(project_out$`노선명` == "9호선" | project_on$`노선명` == "9호선2~3단계", "tan",
                      ifelse(project_out$`노선명` == "경강선", "skyblue", 
                      ifelse(project_out$`노선명`=="수인선"| project_on$`노선명`=="분당선","yellow",
                      ifelse(project_out$`노선명`=="공항철도 1호선","#37b0e1",
                      ifelse(project_out$`노선명`=="안산선","cyan","black")))))))))))))

# plotrix 패키지 로드
library(plotrix)
label <- paste(project_on$`노선명`,"\n",round(project_on$`승객비율`,2),'%')
label1 <- paste(project_out$`노선명`,"\n",round(project_out$`승객비율`,2),'%')
# 3D 파이 차트 그리기
par(family = "AppleGothic", bg = ' white')
par(mfrow=c(1,2))

pie3D(project_on$`승객비율`, main = "2020~202402 서울 노선별 승차승객비율",
      col= cols,
      labels = ifelse(project_on$`승객비율`>1,label,project_on$`노선명`),
      labelcex = 0.9,
      radius = 0.9,explode = 0.05)      
pie3D(project_out$`승객비율`, main = "2020~202402 서울 노선별 하차승객비율",
      col= cols1,
      labels = ifelse(project_out$`승객비율`>1,label1,project_out$`노선명`),
      labelcex = 0.9,
      radius = 0.9,explode = 0.05)

모든 역을 기준으로 승/하차가 높은 역을 상위5개 표시

출처 : 서울시 열린 데이터 광장 http://data.seoul.go.kr/dataList/OA-12914/S/1/datasetView.do

In [10]:
project_subway_on <- project[,c(3,4,5)] 
project_subway_on <- aggregate(.~역명,project_subway_on,sum)
project_subway_on <- arrange(project_subway_on,desc(승차총승객수))
project_subway_on1 <- arrange(project_subway_on,desc(하차총승객수))


project_subway <- arrange(project_subway_on,desc(승차총승객수))[1:5,1]
project_subway_out <- arrange(project_subway_on,desc(하차총승객수))[1:5,1]

top41 <- c()
for(i in 1:length(project_subway)){
    topTemp1 <- apply(subset(project,역명 == project_subway[i] )[,4:5],2,sum)
    top41 <- cbind(top41,topTemp1)
}
colnames(top41) <- project_subway
top41
par(mfrow=c(1,2))
par(family = "AppleGothic", bg = 'white')
options(scipen = 100)
barplot(top41,ylim=c(0,300000000),main="2020~202402 승차횟수가 높은 5개의 역의 정보",legend.text = T,
col=c('skyblue','orange'))

per <- project_subway_on
per <- per[,2:3]
per <- head(per,5)
per$TotalVotes <- rowSums(per[, 1:2])
per1 <- project_subway_on1
per1 <- per1[,2:3]
per1 <- head(per1,5)
per1$TotalVotes <- rowSums(per[, 1:2])

per$승차비율 <- round((per$`승차총승객수`/per$TotalVotes)*100,2)
per$하차비율 <- round((per$`하차총승객수`/per$TotalVotes)*100,2)
per

per1$하차비율 <- round((per1$`하차총승객수`/per1$TotalVotes)*100,2)
per1$승차비율 <- round((per1$`승차총승객수`/per1$TotalVotes)*100,2)
per1

for (i in 1:length(per$`하차비율`)) {
  text(0.7 + 1.2 * (i - 1), per$`승차총승객수`[i]/2, paste0(round(per$`승차비율`[i], 2), "%"))
  text(0.7 + 1.2 * (i - 1), per$`승차총승객수`[i]+per$`하차총승객수`[i]/2, paste0(round(per$`하차비율`[i], 2), "%"))
}


out <- c()
for(i in 1:length(project_subway_out)){
    topTemp1 <- apply(subset(project,역명 == project_subway_out[i] )[,4:5],2,sum)
    out <- cbind(out,topTemp1)
}
colnames(out) <- project_subway_out 
out

barplot(out,ylim=c(0,300000000),main="2020~202402 하차횟수가 높은 5개의 역의 정보",
col=c('orange','skyblue'),legend.text=c("하차총승객수","승차총승객수"))

for (i in 1:length(per$`승차비율`)) {
  text(0.7 + 1.2 * (i - 1), per1$`하차총승객수`[i]/2, paste0(round(per1$`하차비율`[i], 2), "%"))
  text(0.7 + 1.2 * (i - 1), per1$`하차총승객수`[i]+per1$`승차총승객수`[i]/2, paste0(round(per1$`승차비율`[i], 2), "%"))
}
A matrix: 2 x 5 of type dbl
잠실(송파구청)강남서울역고속터미널홍대입구
승차총승객수118251627108038430106523677104393744101294294
하차총승객수119844711106249865106040215104545416107069412
A data.frame: 5 x 5
승차총승객수하차총승객수TotalVotes승차비율하차비율
<dbl><dbl><dbl><dbl><dbl>
111825162711984471123809633849.6750.33
210803843010624986521428829550.4249.58
310652367710604021521256389250.1149.89
410439374410454541620893916049.9650.04
510129429410706941220836370648.6151.39
A data.frame: 5 x 5
승차총승객수하차총승객수TotalVotes하차비율승차비율
<dbl><dbl><dbl><dbl><dbl>
111825162711984471123809633850.3349.67
210129429410706941221428829549.9747.27
310803843010624986521256389249.9850.83
410652367710604021520893916050.7550.98
510439374410454541620836370650.1750.10
A matrix: 2 x 5 of type dbl
잠실(송파구청)홍대입구강남서울역고속터미널
승차총승객수118251627101294294108038430106523677104393744
하차총승객수119844711107069412106249865106040215104545416

2020~202402 지하철 총 승/하차 승객수

출처 : 서울시 열린 데이터 광장 http://data.seoul.go.kr/dataList/OA-12914/S/1/datasetView.do

In [11]:
PROJECT02[,5][is.na(PROJECT02[,5])] <- 0
In [30]:
#승차
projectsum <- sum(PROJECT01[,4]) #2020
projectsum2 <- sum(PROJECT02[,4]) #2021
projectsum3 <- sum(PROJECT03[,4]) #2022
projectsum4 <- sum(PROJECT04[,4])
projectsum5 <- sum(PROJECT05[,4])
projectsum6 <- sum(PROJECT06[,4])
projectsum7 <- sum(PROJECT07[,4])
projectsum8 <- sum(PROJECT08[,4])
projectsum9 <- sum(PROJECT09[,4])
projectsum10 <- sum(PROJECT10[,4])
projectsum11 <- sum(PROJECT11[,4])
projectsum12 <- sum(PROJECT12[,4])
projectsum13 <- sum(PROJECT13[,4])
projectsum14 <- sum(PROJECT14[,4])
projectsum15 <- sum(PROJECT15[,4]) ##2023
projectsum16 <- sum(PROJECT16[,4])
projectsum17 <- sum(PROJECT17[,4]) #2024

#하차
projectsum18 <- sum(PROJECT01[,5]) #2020
projectsum19 <- sum(PROJECT02[,5]) #2021
projectsum20 <- sum(PROJECT03[,5]) #2022
projectsum21 <- sum(PROJECT04[,5])
projectsum22 <- sum(PROJECT05[,5])
projectsum23 <- sum(PROJECT06[,5])
projectsum24 <- sum(PROJECT07[,5])
projectsum25 <- sum(PROJECT08[,5])
projectsum26 <- sum(PROJECT09[,5])
projectsum27 <- sum(PROJECT10[,5])
projectsum28 <- sum(PROJECT11[,5])
projectsum29 <- sum(PROJECT12[,5])
projectsum30 <- sum(PROJECT13[,5])
projectsum31 <- sum(PROJECT14[,5])
projectsum32 <- sum(PROJECT15[,5]) ##2023
projectsum33 <- sum(PROJECT16[,5])
projectsum34 <- sum(PROJECT17[,5]) #2024

projectsum19

#승차
sums2023 <- c(
  sum(PROJECT04[,4]),
  sum(PROJECT05[,4]),
  sum(PROJECT06[,4]),
  sum(PROJECT07[,4]),
  sum(PROJECT08[,4]),
  sum(PROJECT09[,4]),
  sum(PROJECT10[,4]),
  sum(PROJECT11[,4]),
  sum(PROJECT12[,4]),
  sum(PROJECT13[,4]),
  sum(PROJECT14[,4]),
  sum(PROJECT15[,4])
)

# 데이터프레임으로 변환
sum_df <- data.frame(Year = 2023, Sum = sums2023)
sum_df <- aggregate(Sum~Year,sum_df,sum)

sums2024 <- c(
  sum(PROJECT16[,4]),
  sum(PROJECT17[,4]))

sum_df2024 <- data.frame(Year = 2024, Sum = sums2024)
sum_df2024 <- aggregate(Sum~Year,sum_df2024,sum)

#하차

sums2023_out <- c(
  sum(PROJECT04[,5]),
  sum(PROJECT05[,5]),
  sum(PROJECT06[,5]),
  sum(PROJECT07[,5]),
  sum(PROJECT08[,5]),
  sum(PROJECT09[,5]),
  sum(PROJECT10[,5]),
  sum(PROJECT11[,5]),
  sum(PROJECT12[,5]),
  sum(PROJECT13[,5]),
  sum(PROJECT14[,5]),
  sum(PROJECT15[,5])
)

# 데이터프레임으로 변환
sum_df_out <- data.frame(Year = 2023, Sum = sums2023_out)
sum_df_out <- aggregate(Sum~Year,sum_df_out,sum)

sums2024_out <- c(
  sum(PROJECT16[,5]),
  sum(PROJECT17[,5]))

sum_df2024_out <- data.frame(Year = 2024, Sum = sums2024_out)
sum_df2024_out <- aggregate(Sum~Year,sum_df2024_out,sum)




par(family = 'AppleGothic',bg='white')



sum_data <- data.frame(Project = c("2020", "2021", "2022", "2023", "2024"),
                       Total = c(projectsum, projectsum2, projectsum3, sum_df$Sum, sum_df2024$Sum))
sum_data1 <- data.frame(Project = c("2020", "2021", "2022", "2023", "2024"),
                       Total = c(projectsum18, projectsum19, projectsum20, sum_df_out$Sum, sum_df2024_out$Sum))

sum_data1
                       

# 꺾은선 그래프 그리기
plot(sum_data$Total, type='o',ylim=c(140000000,2800000000),col="darkblue", ylab="승/하차 승객수", xlab="연도", xaxt='n',
main = "2020~202402 지하철 승/하차 승객수")
axis(side=1, at=1:5, labels=sum_data$Project)

lines(sum_data1$Total,col="red",type="o")


for (i in 1:length(sum_data$Total)) {
  text(1.05 + 0.95 * (i - 1), sum_data$`Total`[i]+60000000, format(sum_data$Total[i], big.mark = ","), col = "blue",cex=1.1)
  text(1.05 + 0.95 * (i - 1), sum_data1$`Total`[i]-60000000, format(sum_data1$Total[i], big.mark = ","), col = "red",cex=1.1)
}

legend("topright", legend=c("승차총승객수", "하차총승객수"), fill=c("darkblue", "red"))
2000854031
A data.frame: 5 x 2
ProjectTotal
<chr><dbl>
20201961954817
20212000854031
20222232980722
20232441593134
2024 375787884

1호선~9호선 승/하차 승객수(상위5개역, 하위 5개역)

출처 : 서울시열린데이터광장 http://data.seoul.go.kr/dataList/OA-12914/S/1/datasetView.do

신내, 연신내, 충무로 같은곳은 다른 호선으로 연결되어있어 예상보다 낮게 표시됨

In [29]:
project$`역명`[project$역명 == "동대문역사문화공원(DDP)"] <- "DDP"
project$`역명`[project$역명 == "청량리(서울시립대입구)"] <- "청량리"
project$`역명`[project$역명 == "이촌(국립중앙박물관)"] <- "이촌"
project$`역명`[project$역명 == "경복궁(정부서울청사)"] <- "경복궁"
project$`역명`[project$역명 == "상봉(시외버스터미널)"] <- "상봉"

for (line_num in 1:9) {
  line_name <- paste0(line_num, "호선")
  
  project_line <- subset(project, 노선명 == line_name)[, c(3:5)]
  rownames(project_line) <- NULL
  project_line <- aggregate(.~역명, project_line, sum)
  project_line <- arrange(project_line, desc(승차총승객수))
  project_line <- head(project_line, 5)
  
  project_line_1 <- subset(project, 노선명 == line_name)[, c(3:5)]
  rownames(project_line_1) <- NULL
  project_line_1 <- aggregate(.~역명, project_line_1, sum)
  project_line_1 <- arrange(project_line_1, 승차총승객수)
  project_line_1 <- head(project_line_1, 5)

  

  
  station <- project_line[c(1:5), 1]
  station_1 <- project_line_1[c(1:5), 1]
  
  par(mfrow=c(1,2))
  par(family="AppleGothic", bg='white')
  
  plot(project_line$`승차총승객수`, type='o', ylim=c(0, 130000000), col="darkblue", ylab="승/하차 승객수", xlab="역명", xaxt='n',
       main = paste("2020~202402   ", line_name, " 승/하차 승객수 (상위5개역)"))
  axis(side=1, at=1:5, labels=station)
  lines(project_line$`하차총승객수`, col="red", type="o")
  legend("topright", legend=c("승차총승객수", "하차총승객수"), fill=c("darkblue", "red"))
  for (i in 1:length(project_line$`역명`)) {
    text(1.3 + 0.85 * (i - 1), project_line$`승차총승객수`[i] + 10000000, format(project_line$`승차총승객수`[i], big.mark = ","), col="blue",cex=1.1)
    text(1.3 + 0.85 * (i - 1), project_line$`하차총승객수`[i] - 10000000, format(project_line$`하차총승객수`[i], big.mark = ","), col="red",cex=1.1)
  }
  
  plot(project_line_1$`승차총승객수`, type='o', ylim=c(0, 35000000), col="darkblue", ylab="승/하차 승객수", xlab="역명", xaxt='n',
       main = paste("2020~202402   ", line_name, " 승/하차 승객수 (하위5개역)"))
  axis(side=1, at=1:5, labels=station_1)
  lines(project_line_1$`하차총승객수`, col="red", type="o")
  legend("topright", legend=c("승차총승객수", "하차총승객수"), fill=c("darkblue", "red"))
  for (i in 1:length(project_line$`역명`)) {
    text(1.19 + 0.9 * (i - 1), project_line_1$`승차총승객수`[i] + 1500000, format(project_line_1$`승차총승객수`[i], big.mark = ","), col="blue",cex=1.1)
    text(1.19 + 0.9 * (i - 1), project_line_1$`하차총승객수`[i] - 900000, format(project_line_1$`하차총승객수`[i], big.mark = ","), col="red",cex=1.1)
  }
}
In [23]:
for (line_num in 1:9) {
  line_name <- paste0(line_num, "호선")
  
  project_line <- subset(project, 노선명 == line_name)[, c(3:5)]
  rownames(project_line) <- NULL
  project_line <- aggregate(.~역명, project_line, sum)
  project_line <- arrange(project_line, desc(승차총승객수))
  project_line <- head(project_line, 10)
  
  project_line_1 <- subset(project, 노선명 == line_name)[, c(3:5)]
  rownames(project_line_1) <- NULL
  project_line_1 <- aggregate(.~역명, project_line_1, sum)
  project_line_1 <- arrange(project_line_1, 승차총승객수)
  project_line_1 <- head(project_line_1, 10)


  total_passengers <- sum(project_line$`승차총승객수`) # 전체 승차총승객수 계산
  project_line$percentages <- (project_line$`승차총승객수` / total_passengers) * 100 # 각 역별 비율 계산

  total_passengers_1 <- sum(project_line_1$`승차총승객수`) # 전체 승차총승객수 계산
  project_line_1$percentages <- (project_line_1$`승차총승객수` / total_passengers_1) * 100 # 각 역별 비율 계산

  total_passengers_out <- sum(project_line$`하차총승객수`) # 전체 승차총승객수 계산
  project_line$percentages_out <- (project_line$`하차총승객수` / total_passengers_out) * 100 # 각 역별 비율 계산

  total_passengers_1_out <- sum(project_line_1$`하차총승객수`) # 전체 승차총승객수 계산
  project_line_1$percentages_out <- (project_line_1$`하차총승객수` / total_passengers_1_out) * 100 # 각 역별 비율 계산
  
all_colors <- colors()
random_color <- sample(all_colors, 10, replace = FALSE)
  
  station <- project_line[c(1:5), 1]
  station_1 <- project_line_1[c(1:5), 1]
  
  par(mfrow=c(2,2))
  par(family="AppleGothic", bg='white')
  
pie3D(project_line$percentages, main = paste("2020~202402   ", line_name, " 승차 승객수 (상위10개역)"),
      col=  random_color,
      labels = ifelse(project_line$percentages>5,paste(project_line$`역명`,paste0(round(project_line$percentages,2),"%"),sep = "\n"),project_line$`역명`),
      labelcex = 0.9,
      radius = 0.9,explode = 0.05)      
  
pie3D(project_line$percentages_out, main = paste("2020~202402   ", line_name, " 하차 승객수 (상위10개역)"),
      col= random_color,
      labels = ifelse(project_line$percentages_out>5,paste(project_line$`역명`,paste0(round(project_line$percentages_out,2),"%"),sep = "\n"),project_line$`역명`),
      labelcex = 0.9,
      radius = 0.9,explode = 0.05) 

pie3D(project_line_1$percentages, main = paste("2020~202402   ", line_name, " 승차 승객수 (하위10개역)"),
      col= random_color,
      labels = ifelse(project_line_1$percentages>5, paste(project_line_1$`역명`, paste0(round(project_line_1$percentages, 2), "%"), sep = "\n"),project_line_1$`역명`),
      labelcex = 0.9,
      radius = 0.9,explode = 0.05)      
      



pie3D(project_line_1$percentages_out, main = paste("2020~202402   ", line_name, " 하차 승객수 (하위10개역)"),
      col= random_color,
      labels =  ifelse(project_line_1$percentages_out>5,paste(project_line_1$`역명`,paste0(round(project_line_1$percentages_out,2),"%"),sep = "\n"),project_line_1$`역명`),
      labelcex = 0.8,
      radius = 0.9,explode = 0.05)                 

}

중앙선 승/하차 승객수(상위6개역, 하위 6개역)

출처 : 서울시열린데이터광장 http://data.seoul.go.kr/dataList/OA-12914/S/1/datasetView.do

In [21]:
library(dplyr)

  project_line <- subset(project, 노선명 == "중앙선")[, c(3:5)]
  rownames(project_line) <- NULL
  project_line <- aggregate(.~역명, project_line, sum)
  project_line <- arrange(project_line, desc(승차총승객수))
  project_line <- head(project_line, 6)
  
  project_line_1 <- subset(project, 노선명 == "중앙선")[, c(3:5)]
  rownames(project_line_1) <- NULL
  project_line_1 <- aggregate(.~역명, project_line_1, sum)
  project_line_1 <- arrange(project_line_1, 승차총승객수)
  project_line_1 <- head(project_line_1, 6)


  

  
  station <- project_line[c(1:6), 1]
  station_1 <- project_line_1[c(1:6), 1]
  
  par(mfrow=c(1,2))
  par(family="AppleGothic", bg='white')
  
  plot(project_line$`승차총승객수`, type='o', ylim=c(0, 40000000), col="darkblue", ylab="승/하차 승객수", xlab="역명", xaxt='n',
       main = "2020~202402 중앙선 승/하차 승객수 (상위6개역)")
  axis(side=1, at=1:6, labels=station)
  lines(project_line$`하차총승객수`, col="red", type="o")
  legend("topright", legend=c("승차총승객수", "하차총승객수"), fill=c("darkblue", "red"))
  for (i in 1:length(project_line$`역명`)) {
    text(1.3 + 0.89 * (i - 1), project_line$`승차총승객수`[i] + 2000000, format(project_line$`승차총승객수`[i], big.mark = ","), col="blue",cex=1.1)
    text(1.3 + 0.89 * (i - 1), project_line$`하차총승객수`[i] - 2500000, format(project_line$`하차총승객수`[i], big.mark = ","), col="red",cex=1.1)
  }
  
  plot(project_line_1$`승차총승객수`, type='o', ylim=c(0, 1200000), col="darkblue", ylab="승/하차 승객수", xlab="역명", xaxt='n',
       main ="2020~202402 중앙선 승/하차 승객수 (하위6개역)")
  axis(side=1, at=1:6, labels=station_1)
  lines(project_line_1$`하차총승객수`, col="red", type="o")
  legend("topright", legend=c("승차총승객수", "하차총승객수"), fill=c("darkblue", "red"))
  for (i in 1:length(project_line$`역명`)) {
    text(1.2 + 0.91 * (i - 1), project_line_1$`승차총승객수`[i] + 50000, format(project_line_1$`승차총승객수`[i], big.mark = ","), col="blue",cex=1.1)
    text(1.16 + 0.91 * (i - 1), project_line_1$`하차총승객수`[i] - 70000, format(project_line_1$`하차총승객수`[i], big.mark = ","), col="red",cex=1.1)
  }

  
 project_line <- subset(project, 노선명 == "중앙선")[, c(3:5)]
  rownames(project_line) <- NULL
  project_line <- aggregate(.~역명, project_line, sum)
  project_line <- arrange(project_line, desc(승차총승객수))
  project_line <- head(project_line, 10)
  
  project_line_1 <- subset(project, 노선명 == "중앙선")[, c(3:5)]
  rownames(project_line_1) <- NULL
  project_line_1 <- aggregate(.~역명, project_line_1, sum)
  project_line_1 <- arrange(project_line_1, 승차총승객수)
  project_line_1 <- head(project_line_1, 10)


  total_passengers <- sum(project_line$`승차총승객수`) # 전체 승차총승객수 계산
  project_line$percentages <- (project_line$`승차총승객수` / total_passengers) * 100 # 각 역별 비율 계산

  total_passengers_1 <- sum(project_line_1$`승차총승객수`) # 전체 승차총승객수 계산
  project_line_1$percentages <- (project_line_1$`승차총승객수` / total_passengers_1) * 100 # 각 역별 비율 계산

  total_passengers_out <- sum(project_line$`하차총승객수`) # 전체 승차총승객수 계산
  project_line$percentages_out <- (project_line$`하차총승객수` / total_passengers_out) * 100 # 각 역별 비율 계산

  total_passengers_1_out <- sum(project_line_1$`하차총승객수`) # 전체 승차총승객수 계산
  project_line_1$percentages_out <- (project_line_1$`하차총승객수` / total_passengers_1_out) * 100 # 각 역별 비율 계산
  
all_colors <- colors()
random_color <- sample(all_colors, 10, replace = FALSE)
  
  station <- project_line[c(1:10), 1]
  station_1 <- project_line_1[c(1:10), 1]
  
  par(mfrow=c(2,2))
  par(family="AppleGothic", bg='white')
  
pie3D(project_line$percentages, main = "2020~202402 중앙선 승차 승객수 (상위10개역)",
      col=  random_color,
      labels = ifelse(project_line$percentages>5,paste(project_line$`역명`,paste0(round(project_line$percentages,2),"%"),sep = "\n"),project_line$`역명`),
      labelcex = 0.9,
      radius = 0.9,explode = 0.05)      
  
pie3D(project_line$percentages_out, main = "2020~202402 중앙선 하차 승객수 (상위10개역)",
      col= random_color,
      labels = ifelse(project_line$percentages_out>5,paste(project_line$`역명`,paste0(round(project_line$percentages_out,2),"%"),sep = "\n"),project_line$`역명`),
      labelcex = 0.9,
      radius = 0.9,explode = 0.05) 

pie3D(project_line_1$percentages, main = "2020~202402 중앙선 승차 승객수 (하위10개역)",
      col= random_color,
      labels = ifelse(project_line_1$percentages>5, paste(project_line_1$`역명`, paste0(round(project_line_1$percentages, 2), "%"), sep = "\n"),project_line_1$`역명`),
      labelcex = 0.9,
      radius = 0.9,explode = 0.05)      


pie3D(project_line_1$percentages_out, main = "2020~202402 중앙선 하차 승객수 (하위10개역)",
      col= random_color,
      labels =  ifelse(project_line_1$percentages_out>5,paste(project_line_1$`역명`,paste0(round(project_line_1$percentages_out,2),"%"),sep = "\n"),project_line_1$`역명`),
      labelcex = 0.9,
      radius = 0.9,explode = 0.05)                 


project_line_1
project_line
A data.frame: 10 x 5
역명승차총승객수하차총승객수percentagespercentages_out
<chr><dbl><dbl><dbl><dbl>
1지평 88854 93840 0.9064445 0.9829888
2신원 332093 279760 3.3878481 2.9305300
3오빈 425820 422294 4.3440045 4.4235961
4원덕 542820 519127 5.5375805 5.4379370
5아신 830435 822395 8.4716861 8.6147170
6양정 1004053 86190810.2428509 9.0286219
7국수 1027823101413710.485340610.6232446
8운길산1287494123356813.134375412.9218189
9팔당 1452296150273614.815603715.7413960
10양수 2810788279663128.674265629.2951497
A data.frame: 10 x 5
역명승차총승객수하차총승객수percentagespercentages_out
<chr><dbl><dbl><dbl><dbl>
1회기340554723283234530.85789030.719740
2구리179687941827632716.28164417.100332
3도농139513601279851412.64142011.974990
4망우1049899910273929 9.513213 9.612850
5상봉 8068649 7967147 7.311056 7.454499
6덕소 7614960 7540158 6.899966 7.054985
7중랑 7602951 7259862 6.889084 6.792724
8양평 4199634 4244835 3.805316 3.971700
9도심 3240400 2587468 2.936148 2.420977
10양원 3161066 3096444 2.864263 2.897203

노선별 승/하차비율

  • 도심주변을 순환으로도는 2호선이 가장 많고, 2번째는 7호선과 5호선이 이어진다.

연도별 승/하차​

  • 2020년부터 비교시 2024년 2월까지 자료밖에없으니 제외하고 2020년부터 2023년 으로 가면갈수록 지하철 이용자수가 늘어나고 잇다는 것을 알수 있고,

2020~202402 종합 지하철 승차

  • 종합 지하철 승/하차 횟수는 잠실,강남,서울역,고속터미널,홍대,하차횟수는 잠실,홍대,강남,서울역,고속터미널 순으로,전체적으로 사람이 많은 2호선위주로 사람들이 많이 이용한다는 것을 알수있다. ​ ### 각 호선별 승/하차 비율
  • 1호선: 서울역, 종각
  • 2호선: 강남, 잠실
  • 3호선: 고속터미널, 연신내
  • 4호선: 수유, 혜화
  • 5호선: 광화문, 까치산
  • 6호선: 공덕, 응암
  • 7호선: 가산디지털단지,광명사거리
  • 8호선: 문정, 암사
  • 9호선: 신논현, 노량진
  • 중앙선: 회기, 구리 순으로 높게 측정되었다.

결론

  • 환승역이거나, 번화가 혹은 ktx와 같은 고속철도가 연결된 역에 사람들이 많이 이용한다는 것을 알 수 있다.
  • 승차가 많은 역은 대부분 하차도 높다는 것을 알 수 있다. (승/하차중 몇 역들은 다른호선이랑 겹쳐서 하차혹은 승차가0으로 표시되어있음)
  • 중앙선에서는 특히 지평역이 열차가 평일상하행12회, 주말공휴일에 10회정도밖에 운영을 하지않는데다가, 배차시간이 매우 길기 때문에 가장 이용률이 낮은걸로 집계된다. ​